#!/usr/bin/env python

from Bio import SeqIO
import argparse
import sys
import os
from statistics import mean, median

parser = argparse.ArgumentParser(description = "This script takes a multifasta as input and returns a tab-delimited file with two columns, \
                                              header and number of bases or amino acids for each sequence. It also \
                                              prints out some general stats. For version info, run `bit-version`.")

required = parser.add_argument_group('required arguments')

required.add_argument("-i", "--input-fasta", help = "Original fasta file", action = "store", required=True)
parser.add_argument("-o", "--output-file", help = 'Name of output tab-delimited file (default: "Num-bps.tsv")', action = "store", \
                    default = "Num-bps.tsv")

if len(sys.argv) == 1:
    parser.print_help(sys.stderr)
    sys.exit(0)

args = parser.parse_args()


# starting list to hold seq lengths so we can print out some summary stats
lengths_list = []

# counting number of seqs
n = 0

with open(args.input_fasta, "r") as in_fasta:

    with open(args.output_file, "w") as out_file:

        for seq_record in SeqIO.parse(in_fasta, "fasta"):

            out_file.write(seq_record.id + "\t" + str(len(seq_record.seq)) + "\n")

            lengths_list.append(len(seq_record.seq))

            n += 1

print("\n    Number of seqs:  " + str(n))
print("    Min. length:     " + str(min(lengths_list)))
print("    Max length:      " + str(max(lengths_list)))
print("    Mean length:     " + str(round(mean(lengths_list), 2)))
print("    Median length:   " + str(round(median(lengths_list), 2)) + "\n")
print("  All seq lengths written to: '" + args.output_file + "'\n")
